knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = TRUE)
knitr::opts_knit$set(root.dir = '/Volumes/JARC_2T/NSF-CREST_Postdoc/LongTerm_Hypoxia_exp/AplCal_hypoxia/miRNA/')
# Load libraries
library(DESeq2) # BiocManager::install('DESeq2')
library(limma) # BiocManager::install('limma')
library(EnhancedVolcano) # BiocManager::install('EnhancedVolcano')
library(stringr)
library(readxl)
library(pheatmap)
library(RColorBrewer)
library(variancePartition) # BiocManager::install('variancePartition')
library(doParallel)
library(cowplot)
library(adegenet)
library(ggpubr)
library(kableExtra)
library(tidyverse)
library(ggfortify) # install.packages("ggfortify")
library(Biobase)
library(arrayQualityMetrics)
library(vegan)
library(ggvenn) # install.packages("ggvenn")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("gaospecial/ggVennDiagram")
library(ggVennDiagram)
## ggplot theme
theme_custom <- function() {
theme_bw(base_size = 10) %+replace% #, base_family = "Arial"
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(color = "black", fill = NA),
legend.background = element_rect(fill = NA, colour = NA),
axis.text.x = element_text(angle=45, hjust=1, vjust = 1)
)
}
## ggplot labeller
colnames <- c(
`A` = "abdominal",
`Pp` = "pleural & pedal"
)
parents <- c(
`60` = "lab-reared",
`71` = "wild"
)
global_labeller <- labeller(
tissue = colnames,
batch = parents,
.default = "label_parsed"
)
annot <- read.delim("data/Results.txt", sep = "\t")
miRNA <- annot[which(annot$MIRNA == "Y"), c(2,21)]
knownmiRNA <- na.omit(miRNA)
predictmiRNA <- miRNA[which(is.na(miRNA$KnownRNAs)),]
doubt_miRNA <- annot[,c(2,20:21)]
doubt_miRNA <- na.omit(doubt_miRNA)
doubt_miRNA <- doubt_miRNA[which(doubt_miRNA$MIRNA == "N"),]
annot_known <- annot
annot_known$geneID <- annot$Name
annot_known$geneID <- gsub(knownmiRNA$Name, knownmiRNA$KnownRNAs, annot$Name)
annot_known <- annot_known[, c(2,22)]
count <- read.delim("data/Counts.txt", sep = "\t")
count <- merge(count, annot_known, by = "Name")
rownames(count) <- count$geneID
countMatrix <- count[,-c(1:3,24,34)] # eliminate the outlier sample Ac204A
countMatrix <- countMatrix[, order(names(countMatrix))]
miRNA_counts <- count[which(count$MIRNA == "Y"),]
miRNA_countMatrix <- miRNA_counts[,-c(1:3,24,34)]
doubt_miRNA_counts <- count[which(count$Name %in% doubt_miRNA$Name),]
doubt_miRNA_countMatrix <- doubt_miRNA_counts[,-c(1:3,24,34)]
# Import sample metadata
sdat0 <- read_csv("data/coldata.csv")
colnames(sdat0) <- c("FileName","sample_ID","animal_ID","tissue","treatment","batch","sample_ID")
sdat0$treatment <- gsub("H_6h", "Hyp_T6h", sdat0$treatment)
sdat0$treatment <- gsub("H_6-days", "LtH_6", sdat0$treatment)
sdat0 <- sdat0[-15,]
# add sample IDs to count matrix
colnames(countMatrix) <- sdat0$sample_ID
colnames(miRNA_countMatrix) <- sdat0$sample_ID
colnames(doubt_miRNA_countMatrix) <- sdat0$sample_ID
# subset coldata to include only abdominal samples
sdat0 <- sdat0 %>%
filter(tissue=="Abdominal") %>%
mutate(batch.group = interaction(batch, treatment)) %>%
mutate_if(is.character, as.factor) %>%
mutate(batch=as.factor(batch))
sdat <- sdat0 %>% # order rows by sample name
column_to_rownames(var = "sample_ID") # set sample name to rownames
countMatrix <- countMatrix[, colnames(countMatrix) %in% sdat0$sample_ID]
miRNA_countMatrix <- miRNA_countMatrix[, colnames(miRNA_countMatrix) %in% sdat0$sample_ID]
doubt_miRNA_countMatrix <- doubt_miRNA_countMatrix[, colnames(doubt_miRNA_countMatrix) %in% sdat0$sample_ID]
save(sdat,sdat0,countMatrix,miRNA_countMatrix,doubt_miRNA_countMatrix, file = "output/counts_and_meta.RData")
#sdat$treatment <- factor(sdat$treatment, levels = c("Control","Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery"), ordered = T)
#sdat$group <- paste(sdat$treatment, sdat$batch, sdat$tissue, sep = "")
# load(file = "output/counts_and_meta.RData")
# Create full DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = countMatrix,
colData = sdat,
design = ~ batch+treatment)
# Subset DESeqDataSet
## Subset for batch and ganglia
dds_60 <- dds[, colData(dds)$batch == 60]
dds_71 <- dds[, colData(dds)$batch == 71]
### Create DESeqDataSet for miRNA only
dds_miRNA <- DESeqDataSetFromMatrix(countData = miRNA_countMatrix,
colData = sdat,
design = ~ batch+treatment)
dds_miRNA <- dds_miRNA[ rowMeans(counts(dds_miRNA)) > 1, ]
vsd_miRNA <- varianceStabilizingTransformation(dds_miRNA, blind = TRUE)
# Remove genes with less than 1 mean count across samples
dds <- dds[ rowMeans(counts(dds)) > 1, ]
dds_60 <- dds_60[ rowMeans(counts(dds_60)) > 1, ]
dds_71 <- dds_71[ rowMeans(counts(dds_71)) > 1, ]
# Normalize expression data for visualization purposes using VST tranformation
vsd <- vst(dds, blind = TRUE)
vsd_60 <- vst(dds_60, blind = TRUE)
vsd_71 <- vst(dds_71, blind = TRUE)
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)
## Calculate MDS
mds <- as.data.frame(colData(vsd)) %>%
cbind(cmdscale(sampleDistMatrix)) %>%
mutate(fillcol = factor(ifelse(batch == 60, 1, 0)))
# Calculate group centroids for plotting
mds <- mds %>%
group_by(batch, treatment) %>%
dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%
full_join(mds)
# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]
# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = batch)) +
geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2),
lwd = 0.25, col = "grey") +
geom_point(size = 2, aes(x = c1, y = c2, fill = treatment)) +
geom_point(size = 0.7, aes(x = `1`, y = `2`, fill = treatment), show.legend = F) +
scale_alpha_manual(values = c(1, 0), name = "batch", labels = c("lab_reared parents", "wild parents")) +
guides(shape = guide_legend(order = 2, override.aes = list(fill = "black"))) +
guides(alpha = guide_legend(override.aes = list(shape = 21, alpha = 1, fill = c("black", "white")))) +
labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
theme_custom() +
theme(legend.spacing.y = unit(0, "cm"))
pcoa
#ggsave(filename = "figures/Fig1.pcoa_sncRNA_GE.png",pcoa, width = 5, height = 5)
The PCoA shows that sncRNA expression varies significantly by batch along PC1, and both batched show a separation by treatment mostly across PC2
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_miRNA)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)
## Calculate MDS
mds <- as.data.frame(colData(vsd_miRNA)) %>%
cbind(cmdscale(sampleDistMatrix)) %>%
mutate(fillcol = factor(ifelse(batch == 60, 1, 0)))
# Calculate group centroids for plotting
mds <- mds %>%
group_by(batch, treatment) %>%
dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%
full_join(mds)
# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]
# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = batch)) +
geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2),
lwd = 0.25, col = "grey") +
geom_point(size = 2, aes(x = c1, y = c2, fill = treatment)) +
geom_point(size = 0.7, aes(x = `1`, y = `2`, fill = treatment), show.legend = F) +
scale_alpha_manual(values = c(1, 0), name = "batch", labels = c("lab_reared parents", "wild parents")) +
guides(shape = guide_legend(order = 2, override.aes = list(fill = "black"))) +
guides(alpha = guide_legend(override.aes = list(shape = 21, alpha = 1, fill = c("black", "white")))) +
labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
theme_custom() +
theme(legend.spacing.y = unit(0, "cm"))
pcoa
#ggsave(filename = "figures/Fig.pcoa_miRNA_only_GE.png",pcoa, width = 5, height = 5)
# Run DESeq pipeline
design(dds) <- formula(~ batch.group)
dsr1 <- DESeq(dds)
# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T6h","LtH_6","Recovery","Recovery","LtH_6","Recovery"),
den = c("Control","Control","Control","Hyp_T6h","Hyp_T6h","LtH_6"))
# Get DESeq results for all group contrasts for each tissue
DE <- crossing(batch = c("60", "71"), group.contrasts) %>%
mutate(dsr = pmap(list(batch, num, den), function(x, y, z) {
results(dsr1, contrast = c("batch.group", paste0(x, ".", c(y, z))))}))
# Run DESeq pipeline for both batches together
design(dds) <- formula(~ batch + treatment)
dsr2 <- DESeq(dds)
# Get DESeq results for all contrasts and join with results from each colony, from above
DE <- crossing(batch = "all", group.contrasts) %>%
mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
bind_rows(DE)
## Plot results
DE_60 <- DE[which(DE$batch=="60"),]
names= paste(DE_60$num," vs ",DE_60$den)
volcano_List = list()
for (i in c(1:6)){
name = names[i]
rdf=data.frame(DE_60$dsr[[i]])
plt=EnhancedVolcano(rdf,
lab = rownames(rdf),
x = "log2FoldChange",
y = "padj",
legendPosition = 'none',
title = name,
subtitle = NULL,
caption = NULL)
volcano_List[[i]] = plt
}
volc_plot1 <- plot_grid(plotlist=volcano_List, nrow=2)
volc_plot1
DE_71 <- DE[which(DE$batch=="71"),]
names= paste(DE_71$num," vs ",DE_71$den)
volcano_List = list()
for (i in c(1:6)){
name = names[i]
rdf=data.frame(DE_71$dsr[[i]])
plt=EnhancedVolcano(rdf,
lab = rownames(rdf),
x = "log2FoldChange",
y = "padj",
legendPosition = 'none',
title = name,
subtitle = NULL,
caption = NULL)
volcano_List[[i]] = plt
}
volc_plot2 <- plot_grid(plotlist=volcano_List, nrow=2)
volc_plot2
#ggsave(filename = "figures/Fig.volcano_naive_GE.png",volc_plot1, width = 15, height = 10)
#ggsave(filename = "figures/Fig.volcano_pre-exp_GE.png",volc_plot2, width = 15, height = 10)
# Run DESeq pipeline
design(dds_miRNA) <- formula(~ batch.group)
dsr1 <- DESeq(dds_miRNA)
# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T6h","LtH_6","Recovery","Recovery","LtH_6","Recovery"),
den = c("Control","Control","Control","Hyp_T6h","Hyp_T6h","LtH_6"))
# Get DESeq results for all group contrasts for each tissue
DE_miRNA <- crossing(batch = c("60", "71"), group.contrasts) %>%
mutate(dsr = pmap(list(batch, num, den), function(x, y, z) {
results(dsr1, contrast = c("batch.group", paste0(x, ".", c(y, z))))}))
# Run DESeq pipeline for both batches together
design(dds_miRNA) <- formula(~ batch + treatment)
dsr2 <- DESeq(dds_miRNA)
# Get DESeq results for all contrasts and join with results from each colony, from above
DE_miRNA <- crossing(batch = "all", group.contrasts) %>%
mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
bind_rows(DE_miRNA)
## Plot results
DE_miRNA60 <- DE_miRNA[which(DE_miRNA$batch=="60"),]
names= paste(DE_miRNA60$num," vs ",DE_miRNA60$den)
volcano_List = list()
for (i in c(1:6)){
name = names[i]
rdf=data.frame(DE_miRNA60$dsr[[i]])
plt=EnhancedVolcano(rdf,
lab = rownames(rdf),
x = "log2FoldChange",
y = "padj",
pCutoff = 0.1,
legendPosition = 'none',
title = name,
subtitle = NULL,
caption = NULL)
volcano_List[[i]] = plt
}
volc_plot1 <- plot_grid(plotlist=volcano_List, nrow=2)
volc_plot1
DE_miRNA71 <- DE_miRNA[which(DE_miRNA$batch=="71"),]
names= paste(DE_miRNA71$num," vs ",DE_miRNA71$den)
volcano_List = list()
for (i in c(1:6)){
name = names[i]
rdf=data.frame(DE_miRNA71$dsr[[i]])
plt=EnhancedVolcano(rdf,
lab = rownames(rdf),
x = "log2FoldChange",
y = "padj",
pCutoff = 0.1,
legendPosition = 'none',
title = name,
subtitle = NULL,
caption = NULL)
volcano_List[[i]] = plt
}
volc_plot2 <- plot_grid(plotlist=volcano_List, nrow=2)
volc_plot2
#ggsave(filename = "figures/Fig.volcano_miRNA_naive_GE.png",volc_plot1, width = 15, height = 10)
#ggsave(filename = "figures/Fig.volcano_miRNA_pre-exp_GE.png",volc_plot2, width = 15, height = 10)
DE <- DE %>%
mutate(sig = map(dsr, ~ rownames_to_column(data.frame(.[which(.$padj < 0.05), ]), "gene")),
up = map(sig, ~ filter(., log2FoldChange > 0)),
down = map(sig, ~ filter(., log2FoldChange < 0)))
# Generate logP values for all differential expression contrasts
DE <- DE %>% mutate(logP = map(dsr, ~ data.frame(
gene = rownames(data.frame(.)),
logP = -log10(data.frame(.)$pvalue) * sign(data.frame(.)$log2FoldChange))))
# Count number of differentially expressed genes within each tissue, and overall, for each contrast
DEtab <- DE %>%
filter((num == "Hyp_T6h" & den == "Control")|
(num == "LtH_6" & den == "Control")|
(num == "Recovery" & den == "Control")|
(num == "Recovery" & den == "Hyp_T6h")|
(num == "LtH_6" & den == "Hyp_T6h")|
(num == "Recovery" & den == "LtH_6")) %>%
mutate(nsig = map_dbl( sig, ~ nrow(.)),
nup = map_dbl( up, ~ nrow(.)),
ndn = map_dbl(down, ~ nrow(.)),
`DEGs [up, down]` = paste0(nsig, " [", nup, ", ", ndn, "]")) %>%
mutate(group = case_when(batch == "60" ~ "Naive", batch == "71" ~ "Pre-exposed",
batch == "all" ~ "all groups")) %>%
mutate(group = factor(group, levels = c("Naive", "Pre-exposed", "all groups"))) %>%
select(group, num, den, `DEGs [up, down]`) %>%
spread(group, `DEGs [up, down]`)
DEtab$contrast <- paste(DEtab$num, DEtab$den, sep = " vs ")
dek <- DEtab %>% select(c(6,3:5)) %>%
knitr::kable(format = "markdown", caption = "Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.", row.names = ) %>%
kable_styling(full_width = TRUE)
dek
| contrast | Naive | Pre-exposed | all groups |
|---|---|---|---|
| Hyp_T6h vs Control | 211 [81, 130] | 152 [67, 85] | 270 [112, 158] |
| LtH_6 vs Control | 171 [70, 101] | 256 [174, 82] | 236 [125, 111] |
| LtH_6 vs Hyp_T6h | 28 [17, 11] | 137 [103, 34] | 13 [4, 9] |
| Recovery vs Control | 217 [82, 135] | 247 [145, 102] | 405 [193, 212] |
| Recovery vs Hyp_T6h | 35 [13, 22] | 127 [49, 78] | 8 [3, 5] |
| Recovery vs LtH_6 | 48 [14, 34] | 20 [10, 10] | 11 [4, 7] |
DEGs_60 <- DE %>%
unnest(sig) %>%
filter(batch == "60") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
select(.,contrast, gene)
DEGs_60_wide <- list(Hyp_T6h=pull(DEGs_60[which(DEGs_60$contrast=="Hyp_T6h.Control"),2]),
Hyp_T6d=pull(DEGs_60[which(DEGs_60$contrast=="LtH_6.Control"),2]))
str(DEGs_60_wide)
## List of 2
## $ Hyp_T6h: chr [1:211] "Cluster_26" "Cluster_28" "Cluster_58" "Cluster_61" ...
## $ Hyp_T6d: chr [1:171] "Cluster_26" "Cluster_57" "Cluster_58" "Cluster_94" ...
gv1 <- ggvenn(DEGs_60_wide, fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F
)
gv1
#ggsave("figures/Naive_DEGs_vennDiagram.png", gv1, height = 3, width = 5)
DEGs_71 <- DE %>%
unnest(sig) %>%
filter(batch == "71") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
select(.,contrast, gene)
DEGs_71_wide <- list(Hyp_T6h=pull(DEGs_71[which(DEGs_71$contrast=="Hyp_T6h.Control"),2]),
Hyp_T6d=pull(DEGs_71[which(DEGs_71$contrast=="LtH_6.Control"),2]))
str(DEGs_71_wide)
## List of 2
## $ Hyp_T6h: chr [1:152] "Cluster_26" "Cluster_29" "Cluster_33" "Cluster_58" ...
## $ Hyp_T6d: chr [1:256] "Cluster_25" "Cluster_26" "Cluster_33" "Cluster_34" ...
gv2 <- ggvenn(DEGs_71_wide, fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F
)
gv2
#ggsave("figures/Pre_exposed_DEGs_vennDiagram.png", gv2, height = 3, width = 5)
DEGs <- DE %>%
unnest(sig) %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
filter(contrast %in% c("Hyp_T6h.Control", "LtH_6.Control")) %>%
select(.,contrast, gene, batch)
DEGs <- list(wild=pull(DEGs[which(DEGs$batch=="71"),2]),
lab_reared=pull(DEGs[which(DEGs$batch=="60"),2]))
str(DEGs)
## List of 2
## $ wild : chr [1:408] "Cluster_26" "Cluster_29" "Cluster_33" "Cluster_58" ...
## $ lab_reared: chr [1:382] "Cluster_26" "Cluster_28" "Cluster_58" "Cluster_61" ...
gv3 <- ggvenn(DEGs, fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F
)
gv3
#ggsave("figures/all_DEGs_vennDiagram.png", gv3, height = 3, width = 5)
Similarity of differentially expressed sncRNAs across batches and contrasts
# Find genes that are DE in same direction in both batches when analyzed individually
DE %>%
unnest(sig) %>%
filter(batch != "all") %>%
group_by(num, den) %>%
dplyr::count(gene, log2FoldChange > 0) %>% # is same gene DE in same direction?
summarise(`shared` = sum(n==2)) %>%
knitr::kable(format="markdown", caption = "Number of differentially expressed small RNAs in common between Naive and Pre-exposed animals") %>%
kable_styling(full_width = TRUE)
| num | den | shared |
|---|---|---|
| Hyp_T6h | Control | 83 |
| LtH_6 | Control | 82 |
| LtH_6 | Hyp_T6h | 2 |
| Recovery | Control | 84 |
| Recovery | Hyp_T6h | 1 |
| Recovery | LtH_6 | 0 |
DEGs_60 <- DE %>%
unnest(sig) %>%
filter(batch == "60") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
select(.,contrast, gene, log2FoldChange)
#write_tsv(DEGs_60, path = "output/DEGs_by_contrast_Naive.tsv")
DEGs_71 <- DE %>%
unnest(sig) %>%
filter(batch == "71") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
select(.,contrast, gene, log2FoldChange)
#write_tsv(DEGs_71, path = "output/DEGs_by_contrast_batch 71_pre-exposed.tsv")
# Shared
DEGs_shared <- DE %>%
unnest(sig) %>%
filter(batch != "all") %>%
mutate(contrast=paste(num, den, sep = ".")) %>%
select(.,contrast, batch, gene, log2FoldChange) %>%
group_by(.,contrast) %>%
dplyr::count(gene, log2FoldChange > 0) %>%
filter(n != 1)
colnames(DEGs_shared) <- c("contrast", "gene", "reg", "n")
DEGs_shared$reg <- gsub(TRUE, "Up", DEGs_shared$reg)
DEGs_shared$reg <- gsub(FALSE, "Down", DEGs_shared$reg)
DEGs_shared <- DEGs_shared[,-4]
#write_tsv(DEGs_shared, path = "output/DEGs_shared_batches.tsv")
in batch 60 abdominal
genesincommon_60 <- DE %>%
unite("contrast", num, den, sep = ".") %>%
filter(batch == "60", contrast %in% c("Hyp_T6h.Control", "LtH_6.Control")) %>%
unnest(sig) %>%
select(contrast, gene, log2FoldChange) %>%
pivot_wider(names_from = contrast, values_from = log2FoldChange) %>%
drop_na() %>%
mutate(gene = factor(gene, levels = gene))
genesincommon_60 <- genesincommon_60[c(1:100),]
# Get normalized counts for the genes in common
gcounts <- data.frame(assay(vsd_60[which(rownames(assay(vsd_60)) %in% genesincommon_60$gene), ]))
gcounts <- rownames_to_column(gcounts, var = "gene") %>%
pivot_longer(-gene, names_to = "sample_ID", values_to = "count") %>%
left_join(select(sdat0, sample_ID, treatment)) %>%
mutate(gene = factor(gene, levels = levels(genesincommon_60$gene)))
# Get group log2foldchanges for plotting
gl2fc <- genesincommon_60 %>%
pivot_longer(-gene, names_to = "treatment", values_to = "l2fc") %>%
mutate(treatment = gsub(".Control", "", treatment))
# Get group means for plotting
gcountsmean <- gcounts %>%
group_by(gene, treatment) %>%
summarise(count = mean(count))
# Plot
plotgcounts <- gcounts %>%
ggplot(aes(x = treatment, y = pmax(0,log(count)))) +
geom_jitter(width = 0.2, aes(color = treatment)) +
geom_line(data = gcountsmean, aes(group = gene)) +
geom_text(data = gl2fc, aes(y = 2.4, label = round(l2fc, 2))) +
facet_wrap(~ gene) +
theme_custom() +
theme(legend.position = "none") +
labs(x = "", y = "log(Variance stabilized counts)") +
scale_y_continuous(limits = c(0, 2.6))
plotgcounts
#ggsave(file = "figures/Fig_shared_sncRBA_hyp_60.png", width = 360, height = 360, units = "mm")
in batch 71
genesincommon_71 <- DE %>%
unite("contrast", num, den, sep = ".") %>%
filter(batch == "71", contrast %in% c("Hyp_T6h.Control", "LtH_6.Control")) %>%
unnest(sig) %>%
select(contrast, gene, log2FoldChange) %>%
pivot_wider(names_from = contrast, values_from = log2FoldChange) %>%
drop_na() %>%
mutate(gene = factor(gene, levels = gene))
# Get normalized counts for the genes in common
gcounts <- data.frame(assay(vsd_71[which(rownames(assay(vsd_71)) %in% genesincommon_71$gene), ]))
gcounts <- rownames_to_column(gcounts, var = "gene") %>%
pivot_longer(-gene, names_to = "sample_ID", values_to = "count") %>%
left_join(select(sdat0, sample_ID, treatment)) %>%
mutate(gene = factor(gene, levels = levels(genesincommon_71$gene)))
# Get group log2foldchanges for plotting
gl2fc <- genesincommon_71 %>%
pivot_longer(-gene, names_to = "treatment", values_to = "l2fc") %>%
mutate(treatment = gsub(".Control", "", treatment))
# Get group means for plotting
gcountsmean <- gcounts %>%
group_by(gene, treatment) %>%
summarise(count = mean(count))
# Plot
plotgcounts <- gcounts %>%
ggplot(aes(x = treatment, y = pmax(0,log(count)))) +
geom_jitter(width = 0.2, aes(color = treatment)) +
geom_line(data = gcountsmean, aes(group = gene)) +
geom_text(data = gl2fc, aes(y = 2.4, label = round(l2fc, 2))) +
facet_wrap(~ gene) +
theme_custom() +
theme(legend.position = "none") +
labs(x = "", y = "log(Variance stabilized counts)") +
scale_y_continuous(limits = c(0, 2.6))
plotgcounts
#ggsave(file = "figures/Fig_shared_sncRNA_hyp_71.png", width = 360, height = 360, units = "mm")